{
  "nbformat": 4,
  "nbformat_minor": 2,
  "metadata": {
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8.10"
    },
    "orig_nbformat": 2,
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3.8.10 64-bit ('.env': venv)"
    },
    "colab": {
      "name": "clip-retrieval-getting-started.ipynb",
      "provenance": []
    },
    "interpreter": {
      "hash": "843de08df30066b821f0437d83317f7e657c9d58c210bb967a72474dd7dcb832"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "## Install"
      ],
      "metadata": {
        "id": "uT9FwUjk_lRD"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "!pip install clip-retrieval img2dataset"
      ],
      "outputs": [],
      "metadata": {
        "id": "LIJwsAPIjvnX",
        "outputId": "a8ea45f0-0cf7-4b5c-836a-c7c4f5bbed1f",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Get some image urls"
      ],
      "metadata": {
        "id": "q5-9yk7y_qlW"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "!echo 'https://placekitten.com/200/305' >> myimglist.txt\n",
        "!echo 'https://placekitten.com/200/304' >> myimglist.txt\n",
        "!echo 'https://placekitten.com/200/303' >> myimglist.txt"
      ],
      "outputs": [],
      "metadata": {
        "id": "SA89YmKtjvnX"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Download the image urls"
      ],
      "metadata": {
        "id": "N8Tbn2Kl_t1N"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "!img2dataset --url_list=myimglist.txt --output_folder=image_folder --thread_count=64 --image_size=256"
      ],
      "outputs": [],
      "metadata": {
        "id": "BVZW6noqjvnY",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "484db3d4-249a-4d61-f2d8-5a0f1817a1b4"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Produce embeddings"
      ],
      "metadata": {
        "id": "FMW4ncir_1Jt"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "!clip-retrieval inference  --input_dataset image_folder --output_folder embedding_folder"
      ],
      "outputs": [],
      "metadata": {
        "id": "MvNr8NJRjvnZ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "1484e573-c84d-4593-c7f6-a461b1d516ca"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "!ls -R embedding_folder"
      ],
      "outputs": [],
      "metadata": {
        "id": "aBcl0APqjvnZ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "14bda9f6-e687-43e7-d7ee-754048cc0c2e"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Produce knn indices"
      ],
      "metadata": {
        "id": "Am62ARgs_3_e"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "!clip-retrieval index --embeddings_folder=embedding_folder --index_folder=index_folder"
      ],
      "outputs": [],
      "metadata": {
        "id": "xwzha2vY6OnP",
        "outputId": "63e1d45a-1c13-4f84-8a45-1c7196fb7eb6",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Use the index to get a subset of files"
      ],
      "metadata": {
        "id": "gL_X73OY_6TW"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "!clip-retrieval filter --query \"cat\" --output_folder \"cat/\" --indice_folder \"index_folder\""
      ],
      "outputs": [],
      "metadata": {
        "id": "COVo6tHQjvnZ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "57c02131-5f3a-417b-fd53-36ef5fef1061"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "!ls"
      ],
      "outputs": [],
      "metadata": {
        "id": "wmVuLCKmubsI",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "64547d42-80cc-45a0-d8c6-320f007a77c8"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "ls -R cat"
      ],
      "outputs": [],
      "metadata": {
        "id": "KOdR2ybtjvna",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "219d38d5-f4b1-46b6-b178-c5da880431d0"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "from IPython.display import Image\n",
        "Image(filename='cat/000000000.jpg') "
      ],
      "outputs": [],
      "metadata": {
        "id": "GHtA2Jlajvna",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 273
        },
        "outputId": "3448f92e-d3de-48ae-8ba6-807d414d45fb"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Run the knn service backend"
      ],
      "metadata": {
        "id": "tcvl9hog_-Lg"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "%%bash\n",
        "echo '{\"example_index\": \"index_folder\"}' > indices_paths.json\n",
        "npm install -g localtunnel"
      ],
      "outputs": [],
      "metadata": {
        "id": "8mKtpVPi6jiZ",
        "outputId": "a7382021-c907-44ba-ecf1-4fd864c63089",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "# after running the next cell, visit the localtunnel url once then go to\n",
        "# https://rom1504.github.io/clip-retrieval/?back=<your local tunnel url here>"
      ],
      "outputs": [],
      "metadata": {
        "id": "cUCDh4cq7RgW"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "source": [
        "from threading import Thread\n",
        "\n",
        "def app():\n",
        "  !clip-retrieval back --port 1234 --indices-paths indices_paths.json\n",
        "\n",
        "if __name__ == '__main__':\n",
        "    t1 = Thread(target = app)\n",
        "    a = t1.start()\n",
        "    !lt --port 1234"
      ],
      "outputs": [],
      "metadata": {
        "id": "q6SaDruy6SOJ",
        "outputId": "a544d2c1-d3d0-4267-a12b-c41c3d599e1e",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      }
    }
  ]
}
